###Replication Code for Vaccine Hesitancy and Monetary Incentives
rm(list = ls())


library(stargazer) # Nice regression tables
library(tidyverse)  # data manipulation
library(cluster)    # clustering algorithms
library(factoextra) # clustering algorithms & visualization

df <- read.csv("SurveyData.csv")


###First sub-setting by condition and separating out Hesitants from Acceptants

y1 <- subset(df, Control == 1)

y2 <- subset(df, Free == 1)

y3 <- subset(df, Efficacy == 1)

y4 <- subset(df, Side_Effects == 1)

n1 <- subset(df, Control == 0)

n2 <- subset(df, Free == 0)

n3 <- subset(df, Efficacy == 0)

n4 <- subset(df, Side_Effects == 0)

d1 <- rbind(y1,n1)
d2 <- rbind(y2,n2)
d3 <- rbind(y3,n3)
d4 <- rbind(y4,n4)

nn<-rbind(n1,n2,n3,n4)

#####First regression- Table 3


mylogit1 <- glm(Control ~ Vax.are.safe + Vax.prevent.disease+Public.Health.is.good+Diseases.aren.t.severe.so.no.vax+Every.day.stress.so.no.vax+Cleaning.hands+Social.distance+Mask+Social.protect.vax+ Healthcare.Trust + Govt.Trust + Public.Health.Trust+ Age + Income+Politics+Gender_1+Gender_2, data = d1, family = "binomial")
mylogit2 <- glm(Free ~ Vax.are.safe + Vax.prevent.disease+Public.Health.is.good+Diseases.aren.t.severe.so.no.vax+Every.day.stress.so.no.vax+Cleaning.hands+Social.distance+Mask+Social.protect.vax+ Healthcare.Trust + Govt.Trust + Public.Health.Trust+ Age + Income+Politics+Gender_1+Gender_2, data = d2, family = "binomial")
mylogit3 <- glm(Efficacy ~ Vax.are.safe + Vax.prevent.disease+Public.Health.is.good+Diseases.aren.t.severe.so.no.vax+Every.day.stress.so.no.vax+Cleaning.hands+Social.distance+Mask+Social.protect.vax+ Healthcare.Trust + Govt.Trust + Public.Health.Trust+ Age + Income+Politics+Gender_1+Gender_2, data = d3, family = "binomial")
mylogit4 <- glm(Side_Effects ~ Vax.are.safe + Vax.prevent.disease+Public.Health.is.good+Diseases.aren.t.severe.so.no.vax+Every.day.stress.so.no.vax+Cleaning.hands+Social.distance+Mask+Social.protect.vax+ Healthcare.Trust + Govt.Trust + Public.Health.Trust+ Age + Income+Politics+Gender_1+Gender_2, data = d4, family = "binomial")

stargazer(mylogit1,mylogit2,mylogit3,mylogit4)


##Now separating Reluctants and Unwillings - Table 6

###Replace NA's to 0 and any positive WTA to 1 for logit regression

nn$WTAL[is.na(nn$WTA) ] <- 0
nn$WTAL[nn$WTA<1001] <- 1


mylogit5 <- glm(WTAL ~ Vax.are.safe + Vax.prevent.disease+Public.Health.is.good+Diseases.aren.t.severe.so.no.vax+Every.day.stress.so.no.vax+Cleaning.hands+Social.distance+Mask+Social.protect.vax+ Healthcare.Trust + Govt.Trust + Public.Health.Trust+ Age + Income+Politics+Gender_1+Gender_2, data = nn, family = "binomial")


stargazer(mylogit5)

####Clustering Analysis
####For Clustering we will only use revealed preference behaviours and standard demographic variables
n1clust <- n1[ c(21,22,23,34,35) ]

n2clust <- n2[ c(21,22,23,34,35) ]

n3clust <- n3[ c(21,22,23,34,35) ]

n4clust <- n4[ c(21,22,23,34,35) ]


##To check elbow and silhouette plots for optimal clustering

fviz_nbclust(n1clust,kmeans, method="wss")
fviz_nbclust(n2clust,kmeans, method="wss")
fviz_nbclust(n3clust,kmeans, method="wss")
fviz_nbclust(n4clust,kmeans, method="wss")

fviz_nbclust(n1clust,kmeans, method="silhouette")
fviz_nbclust(n2clust,kmeans, method="silhouette")
fviz_nbclust(n3clust,kmeans, method="silhouette")
fviz_nbclust(n4clust,kmeans, method="silhouette")




##Now perform actual clustering
n1sclust<-scale(n1clust)
n1k <- kmeans(n1sclust, centers = 2, nstart = 25)
n1kvector<-n1k$cluster
n1kvector<-as.data.frame(n1kvector)
n1kvector1 <- subset(n1kvector, n1kvector == 1)
n1kvector2 <- subset(n1kvector, n1kvector == 2)

n1kvector1 <- tibble::rownames_to_column(n1kvector1, "X")
n1kvector2 <- tibble::rownames_to_column(n1kvector2, "X")
n11join = merge(x=n1kvector1,y=n1,by="X",all.x=TRUE)
n12join = merge(x=n1kvector2,y=n1,by="X",all.x=TRUE)

summary(n11join$WTA)
summary(n12join$WTA)

n2sclust<-scale(n2clust)
n2k <- kmeans(n2sclust, centers = 2, nstart = 25)

n2kvector<-n2k$cluster
n2kvector<-as.data.frame(n2kvector)

n2kvector1 <- subset(n2kvector, n2kvector == 1)

n2kvector2 <- subset(n2kvector, n2kvector == 2)

n2kvector1 <- tibble::rownames_to_column(n2kvector1, "X")
n2kvector2 <- tibble::rownames_to_column(n2kvector2, "X")
n21join = merge(x=n2kvector1,y=n2,by="X",all.x=TRUE)
n22join = merge(x=n2kvector2,y=n2,by="X",all.x=TRUE)

summary(n21join$WTA)
summary(n22join$WTA)

n3sclust<-scale(n3clust)
n3k <- kmeans(n3clust, centers = 2, nstart = 25)

n3kvector<-n3k$cluster
n3kvector<-as.data.frame(n3kvector)

n3kvector1 <- subset(n3kvector, n3kvector == 1)

n3kvector2 <- subset(n3kvector, n3kvector == 2)

n3kvector1 <- tibble::rownames_to_column(n3kvector1, "X")
n3kvector2 <- tibble::rownames_to_column(n3kvector2, "X")
n31join = merge(x=n3kvector1,y=n3,by="X",all.x=TRUE)
n32join = merge(x=n3kvector2,y=n3,by="X",all.x=TRUE)

summary(n31join$WTA)
summary(n32join$WTA)

n4sclust<-scale(n4clust)
n4k <- kmeans(n4sclust, centers = 2, nstart = 25)

n4kvector<-n4k$cluster
n4kvector<-as.data.frame(n4kvector)

n4kvector1 <- subset(n4kvector, n4kvector == 1)

n4kvector2 <- subset(n4kvector, n4kvector == 2)

n4kvector1 <- tibble::rownames_to_column(n4kvector1, "X")
n4kvector2 <- tibble::rownames_to_column(n4kvector2, "X")
n41join = merge(x=n4kvector1,y=n4,by="X",all.x=TRUE)
n42join = merge(x=n4kvector2,y=n4,by="X",all.x=TRUE)

summary(n41join$WTA)
summary(n42join$WTA)

###Plots


n11$Condition<-"Control"
n22$Condition<-"Free"
n33$Condition<-"Efficacy"
n44$Condition<-"No Side Effects"

plotWTA <- rbind(n11,n22,n33,n44)

ggplot(plotWTA, aes(WTA, colour = Condition)) + stat_ecdf()


####values directly of CDF

Percentage_Vaccinated1 <- c(70.0, 73.24,74.05,76.48,77.29,78.1,79.72,80.044,82.15,82.96,86.2)
Control <- c(0,5.2,50,200,200,300,500,500,1000,1000,1000)
Percentage_Vaccinated2 <- c(68.3, 71.32,72.075,74.34,75.85,77.36,79.625,80.38,83.4)
Free <- c(0,102.4,197.75,500,500,981,1000,1000,1000)
Percentage_Vaccinated3 <- c(74.5, 76.74,77.3,78.98,79.54,80.1,81.22,82.9,83.46,85.7)
Efficacy <- c(0,177,201,501,506.5,671.5,968,1000,1000,1000)
Percentage_Vaccinated4 <- c(75.0, 77.38,77.975,79.76,80.236,80.95,82.14,83.925,84.52,86.9)
No_Side_Effects <- c(0,90.2,101,303,500,503,851.8,1000,1000,1000)

Control <- cbind(Percentage_Vaccinated1,Control)
Control<-as.data.frame(Control)
Control$Condition <- c("Control")
names(Control)[1] <- "Percentage_Vaccinated"
names(Control)[2] <- "WTA"

Free <- cbind(Percentage_Vaccinated2,Free)
Free<-as.data.frame(Free)
Free$Condition <- c("Free")
names(Free)[1] <- "Percentage_Vaccinated"
names(Free)[2] <- "WTA"

Efficacy <- cbind(Percentage_Vaccinated3,Efficacy)
Efficacy<-as.data.frame(Efficacy)
Efficacy$Condition <- c("Efficacy")
names(Efficacy)[1] <- "Percentage_Vaccinated"
names(Efficacy)[2] <- "WTA"

No_Side_Effects <- cbind(Percentage_Vaccinated4,No_Side_Effects)
No_Side_Effects <-as.data.frame(No_Side_Effects)
No_Side_Effects$Condition <- c("No_Side_Effects")
names(No_Side_Effects)[1] <- "Percentage_Vaccinated"
names(No_Side_Effects)[2] <- "WTA"




plotz<- rbind(Control,Free,Efficacy,No_Side_Effects)

ggplot(plotz,aes(x=WTA,y=Percentage_Vaccinated))+geom_line(aes(col=Condition),size=1.2)+labs(y="Percent Who Would Vaccinate", x="Monetary Incentive") +geom_point(aes(x=1000, y=83.4), colour="#00BFC4",size=1.9)+geom_point(aes(x=1000, y=85.7), colour="#7CAE00",size=1.9)+geom_point(aes(x=1000, y=86.2), colour="#F8766D",size=1.9)+geom_point(aes(x=1000, y=86.9), colour="#C77CFF",size=1.9)

